#Load packages
library("ggplot2")
library("multiwayvcov")
library("miceadds")
library("plyr")
library("dplyr")
library("stats")
library("rmarkdown")

#Read in dataset
setwd("C:/Users/geoff/OneDrive")
df <- read.csv("JEPS Final Data.csv")
summary(df)
str(df)

#Make block variable a factor
df$block<-as.factor(df$block)

#DEMOGRAPHICS OF SAMPLE
#Calculate demographic statistics
table(df$racename)
12787/(8958+12787+14154+3411+1+2103) #30.88 percent Asian/Pacific Islander
8958/(8958+12787+14154+3411+1+2103) #21.63 percent Black
3411/(8958+12787+14154+3411+1+2103) #8.24 percent Latinx
14154/(8958+12787+14154+3411+1+2103) #34.18 percent White
table(df$age)
df$youth<-NA
df$youth[df$age<35]<-1
df$youth[df$age>=35]<-0
table(df$youth)
24718/(24718+16696) #59.69 percent under 35

#CONTACT RATES
#Door-to-door canvassing
table(df$canvassed)
2640/(16224+2640) #14 percent
#Contact rates for texts and digital ads provided by partner organizations
#Texts: 25 percent
#Digital ads: ranged from 23 to 37 percent
#E-mail contact rates
#Tally e-mails sent
table(df$org1_emailsent) #9
table(df$org2_emailsent) #32
table(df$org3_emailsent) #124
table(df$org4_emailsent) #38
table(df$org5_emailsent) #96
#Identify number of voters assigned to treatment
table(df$assignment) #20722 voters assigned to treatment
#Calculate contact rate
(32+124+38+96+9)/20722 #Contact rate: 1.44 percent

#VOTING HISTORY CATEGORIES
#Create subsets by voting history
df.highprop<-df[df$highprop==1,]
df.medhigh<-df[df$medhigh==1,]
df.medlow<-df[df$medlow==1,]
df.lowprop<-df[df$lowprop==1,]
df.newreg<-df[df$newreg==1,]
df.unreg<-df[df$unreg==1,]

#RATES OF VOUCHER USE 
#Among experimental universe
table(df$voucher)
599/(599+40815) #1.45 percent
#Calculate rates of voucher use among voter history subsets
table(df.highprop$voucher)
113/(113+3439) #3.18 percent
table(df.medhigh$voucher)
171/(171+7220) #2.31 percent
table(df.medlow$voucher)
227/(227+18268) #1.23 percent
table(df.lowprop$voucher)
45/(45+8652) #0.52 percent
table(df.newreg$voucher)
47/(47+2528) #1.83 percent
table(df.unreg$voucher) #0 percent
#Rates of voucher use among Win/Win members and non-members
table(df$voucher,df$member)
179/(179+6197) #2.81 percent of members used vouchers
420/(420+34618) #1.20 percent of non-members used vouchers


#TABLE 1. ITT EFFECTS OF VOTER MOBILIZATION ON VOUCHER USE
#Count observations for Model 1
model.lpm<-lm(voucher~assignment+block,data=df)
nobs(model.lpm)
#Model 1
model1 <- miceadds::lm.cluster( data = df , formula = voucher ~ assignment+block, 
                              cluster = "householdID" )
summary(model1)
#Count observations for Model 2
model.lpm.ctrl<-lm(voucher ~ assignment+educationscale+incomescale+age+female+black+asian+latinx+block, data = df)
nobs(model.lpm.ctrl)
#Model 2
model2 <- miceadds::lm.cluster( data = df , formula = voucher ~ assignment+educationscale+incomescale+age+female+black+asian+latinx+block, 
                              cluster = "householdID" )
summary(model2)
#Count observations for Model 3
h23test<-lm(voucher~assignment+highprop+member+assignment*highprop+assignment*member+block,data=df)
nobs(h23test)
#Model 3
model3 <- miceadds::lm.cluster( data = df , formula = voucher~assignment+highprop+member+assignment*highprop+assignment*member+block, 
                                         cluster = "householdID" )
summary(model3)
#Count observations for Model 4
h23test.ctrl<-lm(voucher~assignment+highprop+member+assignment*highprop+assignment*member+educationscale+incomescale+age+female+black+asian+latinx+block,data=df)
nobs(h23test.ctrl)
#Model 4
model4 <- miceadds::lm.cluster( data = df , formula = voucher~assignment+highprop+member+assignment*highprop+assignment*member+educationscale+incomescale+age+female+black+asian+latinx+block, 
                                              cluster = "householdID" )
summary(model4)


#HETEROGENEOUS TREATMENT EFFECTS ON VOUCHER USE
#Treatment effect on low-propensity voters
#Count observations for Model 5
model.lpm.lowprop<-lm(voucher~assignment+block,data=df.lowprop)
nobs(model.lpm.lowprop)
#Model 5
model5 <- miceadds::lm.cluster( data = df.lowprop , formula = voucher ~ assignment+block, 
                                              cluster = "householdID" )
summary(model5)

#Treatment effect on medium-low-propensity voters
#Count observations for Model 6
model.lpm.medlow<-lm(voucher ~ assignment+block,data=df.medlow)
nobs(model.lpm.medlow)
#Model 6
model6 <- miceadds::lm.cluster( data = df.medlow , formula = voucher ~ assignment+block, 
                               cluster = "householdID" )
summary(model6)

#Treatment effect on medium-high-propensity voters
#Count observations for Model 7
model.lpm.medhigh<-lm(voucher~assignment+block,data=df.medhigh)
nobs(model.lpm.medhigh)
#Model 7
model7 <- miceadds::lm.cluster( data = df.medhigh , formula = voucher ~ assignment+block, 
                                              cluster = "householdID" )
summary(model7)

#Treatment effect on high-propensity voters
#Count observations for Model 8
model.lpm.highprop<-lm(voucher~assignment+block,data=df.highprop)
nobs(model.lpm.highprop)
#Model 8
model8 <- miceadds::lm.cluster( data = df.highprop , formula = voucher ~ assignment+block, 
                                cluster = "householdID" )
summary(model8)


#FIGURE 2. ITT EFFECTS BY VOTING HISTORY
#Create dataframe for Figure 2
hte.df<-as.data.frame(rbind(summary(model5)[2,c(1,2)],
                                      summary(model6)[2,c(1,2)],
                                      summary(model7)[2,c(1,2)],
                                      summary(model8)[2,c(1,2)]),
                                stringsAsFactors = FALSE)

#Name variables
names(hte.df) <- c("point.estimate", "se")

#Calculate confidence intervals
hte.df$se.high <- hte.df$point.estimate + hte.df$se
hte.df$se.low <- hte.df$point.estimate - hte.df$se
hte.df$ci.high <- hte.df$point.estimate + hte.df$se * 1.96
hte.df$ci.low <- hte.df$point.estimate - hte.df$se * 1.96

#Vote history labels
hte.df$Subgroups<-c("1. Low Propensity","2. Medium Low Propensity","3. Medium High Propensity","4. High Propensity")

#Set positions of vote history categories
hte.df$xpos<-c(1:4)

#Create figure
fig2<- ggplot(hte.df,
            aes(x=xpos, y=point.estimate*100, color=Subgroups)) +
  theme_classic() +
  #Plot confidence intervals
  geom_linerange(aes(ymin=se.low*100, ymax=se.high*100), lwd=1) +
  geom_linerange(aes(ymin=ci.low*100, ymax=ci.high*100)) +
  #Plot point estimates
  geom_point(color="black") +
  #Label y axis
  ylab("Effect on Voucher Use, in Percentage Points") + 
  #Label x axis, add y=0
  xlab("Voting History") +
  geom_hline(yintercept = 0) +
  #Add title, legend
  ggtitle("Figure 2. ITT Effects by Voting History") +
  theme(legend.position = "bottom")

fig2

#ANALYSIS OF VOTER TURNOUT
#Test of H1
#Count observations for Model 9
model.lpm.voted<-lm(voted~assignment+block, data=df)
nobs(model.lpm.voted)
#Model 9
model9 <- miceadds::lm.cluster( data = df , formula = voted ~ assignment+block, 
                                            cluster = "householdID" )
summary(model9)
#With controls
#Count observations for Model 10
model.lpm.voted.ctrl<-lm(voted ~ assignment+educationscale+incomescale+age+female+black+asian+latinx+block,data=df)
nobs(model.lpm.voted.ctrl)
#Model 10
model10 <- miceadds::lm.cluster( data = df , formula = voted ~ assignment+educationscale+incomescale+age+female+black+asian+latinx+block, 
                                                 cluster = "householdID" )
summary(model10)
#Test of H2, H3
#Count observations for Model 11
model.lpm.voted.int<-lm(voted ~ assignment+member+highprop+assignment*member+assignment*highprop+block,data=df)
nobs(model.lpm.voted.int)
#Model 11
model11 <- miceadds::lm.cluster( data = df , formula = voted ~ assignment+member+highprop+assignment*member+assignment*highprop+block, 
                                                cluster = "householdID" )
summary(model11)
#With controls
#Count observations for Model 12
model.lpm.voted.int.ctrl<-lm(voted ~ assignment+member+highprop+assignment*member+assignment*highprop+educationscale+incomescale+age+female+black+asian+latinx+block,data=df)
nobs(model.lpm.voted.int.ctrl)
#Model 12
model12 <- miceadds::lm.cluster( data = df , formula = voted ~ assignment+member+highprop+assignment*member+assignment*highprop+educationscale+incomescale+age+female+black+asian+latinx+block, 
                                                     cluster = "householdID" )
summary(model12)


#HETEROGENEOUS TREATMENT EFFECTS ON VOTER TURNOUT

#Treatment effect on voting among low-propensity voters
#Count observations for Model 13
model.lpm.vote.lowprop<-lm(voted~assignment+block,data=df.lowprop)
nobs(model.lpm.vote.lowprop)
#Model 13
model13 <- miceadds::lm.cluster( data = df.lowprop , formula = voted ~ assignment+block, 
                                                   cluster = "householdID" )
summary(model13)

#Treatment effect on voting among medium-low-propensity voters
#Count observations for Model 14
model.lpm.vote.medlow<-lm(voted~assignment+block,data=df.medlow)
nobs(model.lpm.vote.medlow)
#Model 14
model14 <- miceadds::lm.cluster( data = df.medlow , formula = voted ~ assignment+block, 
                                                  cluster = "householdID" )
summary(model14)

#Treatment effect on voting among medium-high-propensity voters
#Count observations for Model 15
model.lpm.vote.medhigh<-lm(voted~assignment+block,data=df.medhigh)
nobs(model.lpm.vote.medhigh)
#Model 15
model15 <- miceadds::lm.cluster( data = df.medhigh , formula = voted ~ assignment+block, 
                                 cluster = "householdID" )
summary(model15)

#Treatment effect on voting among high-propensity voters
#Count observations for Model 16
model.lpm.vote.highprop<-lm(voted~assignment+block,data=df.highprop)
nobs(model.lpm.vote.highprop)
#Model 16
model16 <- miceadds::lm.cluster( data = df.highprop , formula = voted ~ assignment+block, 
                                 cluster = "householdID" )
summary(model16)

#FOOTNOTES
#Estimate canvassing contact rate among high-propensity voters
table(df.highprop$canvassed)
275/(1413+275) #16.29 percent

#Estimate canvassing contact rate among low-propensity voters
table(df.lowprop$canvassed)
418/(3370+418) #11.03 percent
